import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import plotly.express as px
%matplotlib inline
import plotly
plotly.offline.init_notebook_mode()
# Read and create date column
crime_df = (pd.read_csv("csv/5_crime_data_REQ100529.csv")
.assign(date = lambda x: x.apply(lambda row: datetime.date(row.YEAR, row.MONTH, row.DAY), axis = 1))
)
Time Series to analize behaviours over time:
Map to notice troublesome areas.
to_plot = crime_df.assign(year_month = lambda x: x.YEAR.astype(str).str[0:4] + '-' + x.MONTH.astype(str).str.zfill(2)).groupby(['year_month', 'TYPE'], as_index=False).size()
sns.lineplot(data = to_plot, x = 'year_month', y = 'size', hue = 'TYPE')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0)
plt.xticks(rotation=90)
plt.title('Time series of crime types')
plt.show()
map_df = crime_df.groupby('NEIGHBOURHOOD', as_index = False).agg({'Latitude': 'mean','Longitude': 'mean', 'date':'count'})
#drawing figure
fig = px.scatter_mapbox(map_df,
lat='Latitude',
lon='Longitude',
color='date',
opacity=0.5,
center=dict(lon=df['Longitude'].mean(),
lat=df['Latitude'].mean()),
zoom=11,
size=df.date * 500)
fig.update_layout(mapbox_style='carto-positron', width = 1200, height = 900)
fig.show()
Determine the number of crimes per crime type for the above days.
# Get crime counts per day
crime_counts = (crime_df
.groupby('date', as_index = False)
.agg(crime_count = ('TYPE', 'count'))
.sort_values('crime_count',ascending = False)
)
print(f'Day with more crime counts: {crime_counts.head(1).date.item()} with {crime_counts.head(1).crime_count.item()} crimes')
display()
print('Crime type distribution:')
top_crime_days = crime_counts.head(1).date
display(crime_df.loc[(crime_df.date.isin(top_crime_days))].groupby(['date', 'TYPE']).size().sort_values(ascending = False))
print(' ')
print(' ')
print(f'Day with less crime counts: {crime_counts.tail(1).date.item()} with {crime_counts.tail(1).crime_count.item()} crimes')
display()
print('Crime type distribution:')
less_crime_days = crime_counts.tail(1).date
display(crime_df.loc[(crime_df.date.isin(less_crime_days))].groupby(['date', 'TYPE']).size().sort_values(ascending = False))
Day with more crime counts: 2011-06-15 with 649 crimes Crime type distribution:
date TYPE
2011-06-15 Mischief 367
Break and Enter Commercial 174
Offence Against a Person 35
Theft from Vehicle 31
Theft of Bicycle 13
Other Theft 11
Break and Enter Residential/Other 10
Theft of Vehicle 6
Vehicle Collision or Pedestrian Struck (with Injury) 2
dtype: int64
Day with less crime counts: 2010-11-24 with 36 crimes Crime type distribution:
date TYPE
2010-11-24 Theft from Vehicle 10
Offence Against a Person 6
Mischief 4
Other Theft 4
Theft of Vehicle 4
Break and Enter Commercial 3
Break and Enter Residential/Other 3
Homicide 1
Theft of Bicycle 1
dtype: int64